# This analysis was conducted using RStudio 2021.09.1+372 "Ghost Orchid" for Mac and R version 4.1.2 for Mac
sink(file = "log.txt")
install.packages("pacman")
pacman::p_load(foreign, ggplot2, plm, reshape2, countrycode, sandwich, lmtest, MASS, devtools, etwfe,
               rworldmap, RColorBrewer, states, mice, VIM, stargazer, margins, clusterSEs, lme4, optimx,
               coefplot, lattice, survey, dplyr, wordcloud, survey, sampleSelection, quanteda,
               Matrix, ldatuning, topicmodels, readtext, stm, lda, bursts, tidytext, PanelMatch,
               tm, readstata13, fastDummies, panelView, igraph, NetMix, dnr, network, sna, bacondecomp,
               pdftools, stringr, hunspell, wordcloud2, forcats, tidyverse, tidytext, stringr, scales, textreuse,
               text2vec, ggrepel, webshot, htmlwidgets, lfe, VGAM, AER, fixest, caTools, textdata, arm, haven)

setwd("ENTER DIRECTORY") # set working directory (change to your directory)

#### Create Text Datasets (RUN ONLY FOR DATA GENERATION) ####

##->## DO NOT RUN THIS. 

### NOTE THAT THE ORIGINAL DOCUMENTS ARE NOT INCLUDED IN THE REPLICATION
### MATERIALS DUE TO SIZE LIMITS. THEY CAN BE ACCESSED AT THE FOLLOWING
### LINK FOR THOSE THAT ARE INTERESTED: https://www.dropbox.com/sh/4okvdsefletl8xx/AABCNOqmOzzmVZEFEWsV4c-Oa?dl=0
### OTHERWISE SKIP TO LINE 55 TO IMPORT THE CLEAN .CSV FILE WITH ALL TEXT

text <- data.frame(matrix(NA, nrow = 54882, ncol = 3)) # create empty data frame for text data
colnames(text) <- c("year", "ctry", "text") # rename cols
k <- 1 # set counter variable
for (i in c(1987:2017)) {
  setwd(file.path("~/Downloads/Replication Materials/GRAYs/", i)) # change path if necessary
  filepath <- getwd()
  files <- list.files(filepath, pattern = '*.PDF|*.pdf', full.names = T)
  dat <- lapply(files, pdf_text)
  for (j in 1:(length(files)-1)) {
    text$year[k] <- str_sub(files[j], start = -9, end = -6)
    text$ctry[k] <- str_sub(files[j], start = 68)
    text$text[k] <- dat[j]
    k <- ifelse(k < 54882, k + 1, k)
  }
} # write text to data frame with relevant attributes
text$ctry <- gsub(").*", "", text$ctry) # subset ctry variable
text$ctry <- gsub("^.*\\(","", text$ctry ) # subset ctry variable
text$ccode <- countrycode(text$ctry, origin = "country.name", destination = "iso3c") # create country code
text <- text[!is.na(text$ccode),] # drop documents with missingness
text$text <- as.character(text$text) # change data format
Corpus <- Corpus(VectorSource(text$text)) # convert to corpus
Corpus.clean <- tm_map(Corpus, stripWhitespace) # strip white space
Corpus.clean <- tm_map(Corpus.clean, removePunctuation) # remove punctuation
Corpus.clean <- tm_map(Corpus.clean, tolower) # convert to lower case
Corpus.clean <- tm_map(Corpus.clean, removeNumbers) # delete numbers
Corpus.clean <- tm_map(Corpus.clean, removeWords, stopwords("english")) # delete stop words
dataframe <- data.frame(text=unlist(sapply(Corpus.clean, `[`)), stringsAsFactors=F) # convert to frame
text <- text[-c(3)] # remove raw text column
text$text <- dataframe$text # write clean text back to frame
text <- text[!duplicated(text$text),] # remove duplicates
# write.csv(text, "cleantext.csv") # write to csv; toggled off for now

##->## START RUNNING HERE
text <- read.csv("cleantext.csv") # load data generated from raw Grays above
text <- text[text$year %in% c(1987:2017),] # restrict to years with coverage on DV

# prep data for country-level work
text$count <- 1 # count variable to aggregate
ctryagg <- aggregate(text$text, list(text$ccode, text$ctry, as.numeric(text$year)), paste, collapse=" ") # aggregate by country-year
colnames(ctryagg) <- c("ccode", "ctry", "year", "text") # rename cols
ctryagg2 <- aggregate(text$count, list(text$ccode, as.numeric(text$year)), sum)
colnames(ctryagg2) <- c("ccode", "year", "count")
ctryagg <- merge(ctryagg, ctryagg2, by = c("ccode", "year"), all.x=T)

# country-level sentiment terms
ctryagg$positive <- NA # create sentiment variables
ctryagg$negative <- NA
ctryagg$sentiment <- NA

for (i in 1:1174) {
  tokens <- data_frame(text = ctryagg$text[i]) %>% unnest_tokens(word, text)
  imf_sentiment <- tokens %>%
    inner_join(get_sentiments("loughran")) %>%
    count(sentiment) %>%
    spread(sentiment, n, fill = 0)
  ctryagg$positive[i] <- ifelse("positive" %in% colnames(imf_sentiment), imf_sentiment$positive[1], 0)
  ctryagg$negative[i] <- ifelse("negative" %in% colnames(imf_sentiment), imf_sentiment$negative[1], 0)
  ctryagg$sentiment[i] <- ((ctryagg$positive[i] - ctryagg$negative[i]) / (ctryagg$positive[i] + ctryagg$negative[i]))
} # find sentiment data for each country-year using loughran financial dictionary

# write.csv(text, "text_data_for_analysis.csv") # write to csv toggled off for now

#### Create Empirical Datasets (RUN ONLY FOR DATA GENERATION) ####
##->## Create IMF power asymmetry variable
quota <- read.csv("quota.csv") # load data on imf quotas 1974-2014, downloaded from IMF
quota2 <- read.csv("quota2.csv") # load data on imf quotas 2015-2017, hand-coded from IMF online resources
quota3 <- read.csv("quota3.csv") # load data on imf quotas 2018-2019, hand-coded from IMF online resources
colnames(quota) <- c("ctry", 1974:2014) # create vote share variable from here down
quota <- melt(quota, id.vars=c("ctry")) # flip to correct format
colnames(quota) <- c("ctry", "year", "quota") # change col names
quota$year <- as.numeric(as.character(quota$year)) # change data format
quota$ccode <- countrycode(quota$ctry, "country.name", "iso3c") # add country codes
quota2$ccode <- countrycode(quota2$ctry, "country.name", "iso3c")
quota3$ccode <- countrycode(quota3$ctry, "country.name", "iso3c")
quotafull <- rbind(quota, quota2, quota3) # merges all years of imf quota data into one frame
basic <- read.csv("basicvotes.csv") 
basic <- basic[-c(3)] # loads in data on basic votes downloaded from IMF
# Voting power in the IMF is based on a quota system. Each member has a number of basic votes 
# Each member's number of basic votes equals 5.502% of the total votes
# Total votes are equal to basic votes + quota share votes (1 per 100,000 SDR in quota)
# Systematic vote data not available per IMF, so I created the variable based on this formula 
# for 2008-2016. Before 2008, each country just got 250 basic votes plus 1 per 100,000 SDR in quota
# so I use that formula for the pre-2008 period

quotafull <- merge(quotafull, basic, by = "year") # merges data
quotafull <- quotafull[!quotafull$quota == 0,] # removes non-members
quotafull <- na.omit(quotafull) # removes NAs
quotafull$votes <- quotafull$basicvotes + (quotafull$quota / 100000) # calculates votes
for (i in 1:7665) {
  year = quotafull$year[i]
  temp = sum(quotafull$votes[quotafull$year == year])
  quotafull$voteshare[i] <- (quotafull$votes[i] / temp)  
} # calculates and writes vote shares

gdp <- read.csv("gdp.csv") # create gdp share variable from here down, gdp data comes from WDI downloaded April 2019
gdp <- gdp[-c(2)] # delete extra col
colnames(gdp) <- c("year", "ctry", "ccode", "gdp") # change col names
gdp$year <- as.numeric(as.character(gdp$year)) # change format
gdp$gdp <- as.numeric(as.character(gdp$gdp))
gdp$ccode <- as.character(gdp$ccode)
gdp <- gdp[-c(2)] # deletes extra col
quotagdp <- merge(quotafull, gdp, by = c("ccode", "year"), all.x = TRUE) # merge data
quotagdp <- quotagdp[!quotagdp$year %in% 2018:2019,] # remove years for which gdp data systematically not available
quotagdpimp <- mice(quotagdp, method = "cart", seed = 500, m = 1,
                    pred=quickpred(quotagdp, exclude=c("ctry"))) # impute remaining missing gdp data so there are no missing gdp share values
quotagdpimp <- mice::complete(quotagdpimp) 
for (i in 1:7289) {
  year = quotagdpimp$year[i]
  temp = sum(quotagdpimp$gdp[quotagdpimp$year == year])
  quotagdpimp$gdpshare[i] <- (quotagdpimp$gdp[i] / temp)  
} # calculate gdp share
quotagdpimp$asymm <- quotagdpimp$gdpshare - quotagdpimp$voteshare # calculates asymmetry
# write.csv(quotagdpimp, "powerasymm.csv") # writes data to csv; toggled off for now

##->## Load covariate data and format it appropriately 
# Economic data
wdi <- read.csv("wdi_11.7.19.csv") # load data for relevant controls downloaded from WDI on 11/7/19
wdi <- wdi[-c(2)] # delete extra col
colnames(wdi) <- c("year", "ctry", "ccode", "gdp", "gdpgrow", "reserves", "receipts", "payments", "curraccgdp", "pop")
wdi$year <- as.numeric(as.character(wdi$year)) # change data format
wdi$ccode <- as.character(wdi$ccode)
wdi$gdp <- as.numeric(as.character(wdi$gdp))
wdi$gdpgrow <- as.numeric(as.character(wdi$gdpgrow))
wdi$reserves <- as.numeric(as.character(wdi$reserves))
wdi$receipts <- as.numeric(as.character(wdi$receipts))
wdi$payments <- as.numeric(as.character(wdi$payments))
wdi$curraccgdp <- as.numeric(as.character(wdi$curraccgdp))
wdi$pop <- as.numeric(as.character(wdi$pop))
wdi2 <- read.csv("wdi_11.2.21.csv") # load data for relevant controls downloaded from WDI on 11/2/21
wdi2 <- wdi2[-c(2)] # delete extra col
colnames(wdi2) <- c("year", "ctry", "ccode", "reservesgni", "dservgni", "dservexp", "unemployment", "wbcomm")
wdi2$year <- as.numeric(as.character(wdi2$year)) # change data format
wdi2$ccode <- as.character(wdi2$ccode)
wdi2$reservesgni <- as.numeric(as.character(wdi2$reservesgni))
wdi2$dservgni <- as.numeric(as.character(wdi2$dservgni))
wdi2$dservexp <- as.numeric(as.character(wdi2$dservexp))
wdi2$unemployment <- as.numeric(as.character(wdi2$unemployment))
wdi2$wbcomm <- as.numeric(as.character(wdi2$wbcomm))
wdi <- merge(wdi, wdi2, by = c("ccode", "year"), all.x = T)

# UN voting data
load("Dyadicdata.RData") # load dyadic UN voting data downloaded October 2018
UNGA <- x # assign to name
UNGA_US <- UNGA[UNGA$ccode1 == 2,] # subset to relations with US
UNGA_US$ccode <- countrycode(UNGA_US$ccode2, origin = "cown", destination = "iso3c") # build ccode
UNGA_US <- UNGA_US[-c(1:2,4:5,7:21)] # delete excess columns
UNGA_US <- aggregate(list(UNGA_US$absidealdiff, UNGA_US$absidealimportantdiff), 
                     by = list(UNGA_US$ccode, UNGA_US$year), FUN = mean) # eliminates issue with duplicates
colnames(UNGA_US) <- c("ccode", "year", "absidealdiff", "absidealimportantdiff")
wdi <- merge(wdi, UNGA_US, by = c("ccode","year"), all.x = T) # merge

# Democracy scores
polity <- read.csv("p4v2017.csv") # load polity data downloaded October 2018
polity$ccode <- countrycode(polity$ctry, origin = "country.name", destination = "iso3c") # build ccode
polity <- polity[-c(2)] # delete unnecessary column
polity <- aggregate(list(polity$polity2), 
                    by = list(polity$ccode, polity$year), FUN = mean) # eliminates issue with duplicates
colnames(polity) <- c("ccode", "year", "polity2")
wdi <- merge(wdi, unique(polity), by = c("ccode","year"), all.x = T) # merge  

# US aid data
usaid <- read.csv("OECDaid.csv") # load OECD US aid data downloaded December 2019
usaid <- usaid[c(2,12,19)] # delete extra cols
colnames(usaid) <- c("ccode","year","USaid") # rename cols
usaid$ccode <- countrycode(usaid$ccode, origin = "country.name", destination = "iso3c")
wdi <- merge(wdi, unique(usaid), by = c("ccode", "year"), all.x = TRUE) # merge data

# Trade
ustrade <- read.csv("Dyadic_COW_4.0.csv") # load COW 4.0 dyadic trade data downloaded January 2019
ustrade <- ustrade[ustrade$ccode1 == 2,] # restrict to dyadic US
ustrade$tottrade <- ustrade$flow1 + ustrade$flow2 # calculate total trade
ustrade <- ustrade[c(2,3,24)]
colnames(ustrade) <- c("ccode", "year", "tottrade")
ustrade$ccode <- countrycode(ustrade$ccode, origin = "cown", destination = "iso3c")
wdi <- merge(wdi, unique(ustrade), by = c("ccode", "year"), all.x = TRUE) # merge data

# UNSC membership
unsc <- read.csv("unsc.csv") # load data on UNSC membership from Dreher, Sturm, and Vreeland (2009) 
# downloaded in November 2018; I hand-coded the more recent years from UN online materials to bring the data up to date
unsc$ccode <- countrycode(unsc$ctry, origin = "country.name", destination = "iso3c") # harmonize country codes
unsc$unsc[unsc$unsc == "."] <- 0 # change empty observations to 0
unsc <- na.omit(unsc) # omit NAs (states that no longer exist)
unsc <- unsc[-c(1)] # delete extra col
unsc$unsc <- as.numeric(as.character(unsc$unsc))
unsc <- aggregate(unsc$unsc, by = list(unsc$ccode, unsc$year), FUN = max)
colnames(unsc) <- c("ccode", "year", "unsc")
wdi <- merge(unique(wdi), unique(unsc), by = c("ccode", "year"), all.x = TRUE) # merge data
wdi$unsc[is.na(wdi$unsc)] <- 0 # change NAs to 0 because missingness denotes not a member
wdi <- wdi[-c(1),] # deletes blank rows
colnames(wdi)[3] <- "ctry"
wdi <- wdi[-c(11)] # delete extra ctry col

# Impute
wdiimp <- mice(wdi, method = "cart", seed = 500, m = 1,
               pred=quickpred(wdi, exclude=c("ctry"))) # impute missing data
wdiimp <- mice::complete(wdiimp)

##->## Load additional datasets needed for analysis
imfasymm <- read.csv("powerasymm.csv") # load imf power asymmetry data, which was created above
imfmem <- read.csv("imf_membership.csv") # load hand coded data on membership in BoP IOs (Pevehouse doesn't cover these, comes from Clark 2022)

##->## Merge together various datasets for IMF analysis
imfasymm <- imfasymm[-c(1,4,7,10)] # delete extra cols
imfmem[is.na(imfmem)] <- 0 # code NAs as 0 because they are not members
imfasymm <- merge(imfasymm, imfmem, by = c("ccode", "year"))
finmemvotefull <- merge(imfasymm, wdiimp, by = c("ccode", "year")) # merge data
tempdv <- finmemvotefull[c(1,2,5,6)] # create temp dataframe for lagging
finmemvotefulllag <- finmemvotefull # duplicate for lagging
finmemvotefulllag$year <- finmemvotefulllag$year + 1 # add one year for lagging
finmemvotefulllag <- finmemvotefulllag[-c(5,6)] # eliminate extra cols
finmemvotefulllag <- merge(finmemvotefulllag, tempdv, by = c("ccode", "year")) # merge data back in
finmemvotefulllag$ccode <- countrycode(finmemvotefulllag$ccode, origin = "iso3c", destination = "cown") # change format
finmemvotefulllag$ccode <- as.numeric(finmemvotefulllag$ccode)

IMF <- read.csv("IMF_cond.csv") # Load the conditions tab from Kentikelenis et al. (2016), which I use to determine which countries are in an IMF program in a given year
IMF$count <- 1 # create a counter variable equal to 1 for each condition
for (i in 1:32261){
  IMF$DEB[i] <- ifelse(IMF$Policy.Area..short.[i] == "DEB", 1, 0)
  IMF$FIN[i] <- ifelse(IMF$Policy.Area..short.[i] == "FIN", 1, 0)
  IMF$FP[i] <- ifelse(IMF$Policy.Area..short.[i] == "FP", 1, 0)
  IMF$EXT[i] <- ifelse(IMF$Policy.Area..short.[i] == "EXT", 1, 0)
  IMF$RTP[i] <- ifelse(IMF$Policy.Area..short.[i] == "RTP", 1, 0)
  IMF$SOE[i] <- ifelse(IMF$Policy.Area..short.[i] == "SOE", 1, 0)
  IMF$LAB[i] <- ifelse(IMF$Policy.Area..short.[i] == "LAB", 1, 0)
  IMF$PRI[i] <- ifelse(IMF$Policy.Area..short.[i] == "PRI", 1, 0)
  IMF$SP[i] <- ifelse(IMF$Policy.Area..short.[i] == "SP", 1, 0)
  IMF$POV[i] <- ifelse(IMF$Policy.Area..short.[i] == "POV", 1, 0)
  IMF$INS[i] <- ifelse(IMF$Policy.Area..short.[i] == "INS", 1, 0)
  IMF$ENV[i] <- ifelse(IMF$Policy.Area..short.[i] == "ENV", 1, 0)
  IMF$OTH[i] <- ifelse(IMF$Policy.Area..short.[i] == "OTH", 1, 0)
} # code each condition by policy category
IMFagg <- aggregate(list(IMF$count, IMF$DEB, IMF$FIN, IMF$FP, IMF$EXT, IMF$RTP, IMF$SOE, IMF$LAB, 
                         IMF$PRI, IMF$SP, IMF$POV, IMF$INS, IMF$ENV, IMF$OTH), by = list(IMF$Country.Name, 
                                                                                         IMF$Country.Code, 
                                                                                         IMF$Arrangement.Date, 
                                                                                         IMF$Arrangement.ID, 
                                                                                         IMF$Year, 
                                                                                         IMF$Arrangement.Duration,
                                                                                         IMF$Arrangement.Type,
                                                                                         IMF$Arrangement.Amount..SDR), 
                    FUN = sum) # aggregate conditions summing by project
colnames(IMFagg) <- c("ctry", "ccode", "date", "ID", "year", "dur", "type", "loansize", "count", "DEB", "FIN", "FP", "EXT", "RTP", 
                      "SOE", "LAB", "PRI", "SP", "POV", "INS", "ENV", "OTH") # change column names
IMFagg$type <- as.character(IMFagg$type)
strings <- c("SBA", "EFF") # use this to filter to BoP projects
IMFagg <- IMFagg %>% 
  filter(str_detect(type, paste(strings, collapse = "|"))) 
IMFagg$dur <- gsub(",.*$", "", IMFagg$dur) # clean duration column
IMFagg$dur <- round(as.numeric(IMFagg$dur) / 12) # calculate roughly how many years project lasts
# write.csv(finmemvotefulllag, "imfreformfull.csv") # write to file; turned off for now

##->## Repeat for non-inputed control variables
finmemvotefull <- merge(imfasymm, wdi, by = c("ccode", "year")) # merge data
tempdv <- finmemvotefull[c(1,2,5,6)] # create temp dataframe for lagging
finmemvotefulllag <- finmemvotefull # duplicate for lagging
finmemvotefulllag$year <- finmemvotefulllag$year + 1 # add one year for lagging
finmemvotefulllag <- finmemvotefulllag[-c(5,6)] # eliminate extra cols
finmemvotefulllag <- merge(finmemvotefulllag, tempdv, by = c("ccode", "year")) # merge data back in
finmemvotefulllag$ccode <- countrycode(finmemvotefulllag$ccode, origin = "iso3c", destination = "cown") # change format
finmemvotefulllag$ccode <- as.numeric(finmemvotefulllag$ccode)
# write.csv(finmemvotefulllag, "imfreformsub.csv") # write to file; turned off for now

##->## Pre-processing
imfimp <- read.csv("imfreformfull.csv") # load in the datasets which were built above
imfimp$ccode <- countrycode(imfimp$ccode, origin = "cown", destination = "iso3c")
imfnoimp <- read.csv("imfreformsub.csv")
imfnoimp$ccode <- countrycode(imfnoimp$ccode, origin = "cown", destination = "iso3c")
imfimp <- imfimp[-c(1,4:6,8:15,17,20:23,25,27,29,36,37)]
imfnoimp <- imfnoimp[-c(1,4:6,8:15,17,20:23,25,27,29,36,37)] # delete extra cols
colnames(ctryagg)[5] <- "countsp"
imfimp <- merge(unique(imfimp), unique(ctryagg), by = c("ccode", "year"), all.x = T)
imfnoimp <- merge(unique(imfnoimp), unique(ctryagg), by = c("ccode", "year"), all.x = T)
imfimp <- merge(unique(imfimp), unique(IMFagg), by = c("ccode", "year"), all.x=T)
imfnoimp <- merge(unique(imfnoimp), unique(IMFagg), by = c("ccode", "year"), all.x=T)
imfimp$gdp <- log1p(imfimp$gdp / 1000000) # logged millions
imfimp$pop <- log1p(imfimp$pop / 1000000)
imfimp$USaid[imfimp$USaid < 0] <- 0 # eliminate negative aid figures
imfimp$USaid <- log1p(imfimp$USaid) # log aid
imfimp$tottrade[imfimp$tottrade < 0] <- 0 # same for trade
imfimp$tottrade <- log1p(imfimp$tottrade)
imfimp$sentiment <- scales::rescale(imfimp$sentiment) # rescale sentiment and asymm
imfimp$asymm <- scales::rescale(imfimp$asymm)
imfimp$count[is.na(imfimp$count)] <- 0 # NA counts of conditions are 0
imfimp$countsp[is.na(imfimp$countsp)] <- 0 # NA counts of grays are 0
imfimp$IMFpart <- ifelse(imfimp$count > 0, 1, 0) # create IMF program binary
imfimp$constit <- ifelse(imfimp$ccode %in% c("USA", "DEU", "CHN", "JPN", "GBR", "FRA", "SAU"), 0, 1) # create binary for constituency member

imfnoimp$gdp <- log1p(imfnoimp$gdp / 1000000)
imfnoimp$pop <- log1p(imfnoimp$pop / 1000000)
imfnoimp$USaid[imfnoimp$USaid < 0] <- 0
imfnoimp$USaid <- log1p(imfnoimp$USaid)
imfnoimp$tottrade[imfnoimp$tottrade < 0] <- 0
imfnoimp$tottrade <- log1p(imfnoimp$tottrade)
imfnoimp$sentiment <- scales::rescale(imfnoimp$sentiment)
imfnoimp$asymm <- scales::rescale(imfnoimp$asymm)
imfnoimp$count[is.na(imfnoimp$count)] <- 0
imfnoimp$countsp[is.na(imfnoimp$countsp)] <- 0
imfnoimp$IMFpart <- ifelse(imfnoimp$count > 0, 1, 0)
imfnoimp$constit <- ifelse(imfnoimp$ccode %in% c("USA", "DEU", "CHN", "JPN", "GBR", "FRA", "SAU"), 0, 1)

# merge in DPI L/R political ideology
dpi <- read.csv("DPI2015.csv") # downloaded in December 2020 from DPI
dpi <- dpi[c(1,3,15)]
dpi$ccode <- countrycode(dpi$countryname, origin = "country.name", destination = "iso3c")
dpi <- dpi[-c(1)]
dpi <- aggregate(dpi$execrlc, by = list(dpi$ccode, dpi$year), FUN = max) # eliminates issue with duplicates
colnames(dpi) <- c("ccode", "year", "execrlc")
dpi$execrlc[dpi$execrlc == -999] <- NA
imfimp <- merge(unique(imfimp), unique(dpi), all.x=T)
imfnoimp <- merge(unique(imfnoimp), unique(dpi), all.x=T)
imfimp2 <- imfimp
imfnoimp2 <- imfnoimp

imfimp2$gdppc <- imfimp2$gdp / imfimp2$pop # create state capacity measure
imfnoimp2$gdppc <- imfnoimp2$gdp / imfnoimp2$pop
imfimp2$positive <- ifelse(is.na(imfimp2$text), NA, imfimp2$positive) # designate sentiment as NA if no Grays
imfnoimp2$positive <- ifelse(is.na(imfnoimp2$text), NA, imfnoimp2$positive)
imfimp2$negative <- ifelse(is.na(imfimp2$text), NA, imfimp2$negative)
imfnoimp2$negative <- ifelse(is.na(imfnoimp2$text), NA, imfnoimp2$negative)
imfimp2$sentiment <- ifelse(is.na(imfimp2$text), NA, imfimp2$sentiment)
imfnoimp2$sentiment <- ifelse(is.na(imfnoimp2$text), NA, imfnoimp2$sentiment)
imfimp2 <- imfimp2[imfimp2$year > 1989,] # restrict to years with full coverage on Grays
imfnoimp2 <- imfnoimp2[imfnoimp2$year > 1989,]

### merge in populism data
# Funke et al. 2022 forthcoming in AER
funke <- read.csv("funke_data.csv")
funke$ccode <- countrycode(funke$ctry, origin = "country.name", destination = "iso3c")
funke <- funke[c(2,3,4,5)]
imfimp2 <- left_join(imfimp2, funke, by = c("year", "ccode"))
imfimp2$fpopulism[is.na(imfimp2$fpopulism)] <- 0 # NA populism is 0
imfnoimp2 <- left_join(imfnoimp2, funke, by = c("year", "ccode"))
imfnoimp2$fpopulism[is.na(imfnoimp2$fpopulism)] <- 0

constit <- read.csv("constituency.csv") #load hand-coded data on constituency memberships for all IMF members, coded from online resources
constit <- constit[-c(3)]
gdp <- data.frame(imfimp2$ccode, imfimp2$year, imfimp2$gdp) # subset GDP to merge into constituencyn data
colnames(gdp) <- c("ccode", "year", "gdp")
constit <- left_join(unique(constit), unique(gdp), by = c("ccode", "year"))
constitagg <- aggregate(list(constit$gdp), by = list(constit$const, constit$year), FUN = sum, na.rm=T)
colnames(constitagg) <- c("const", "year", "gdptot")
constit <- left_join(constit, constitagg, by = c("const", "year"))
constit$gdpshareconst <- constit$gdp / constit$gdptot
constit <- constit[c(1,2,3,6)]

imfimpc <- imfimp2 # copy data to calculate gdp weighted populism measure
imfimpc <- left_join(imfimpc, constit, by = c("ccode", "year"))
imfimpc$constitw <- imfimpc$constit*imfimpc$gdpshareconst
imfimpc$fpopulism <- ifelse(imfimpc$constit==1,
                            imfimpc$fpopulism*imfimpc$constitw,
                            imfimpc$fpopulism) # weight populism by GDP share of constitutency

imfnoimpc <- imfnoimp2 # copy data to do the same for non-imputed data
imfnoimpc <- left_join(imfnoimpc, constit, by = c("ccode", "year"))
imfnoimpc$constitw <- imfnoimpc$constit*imfnoimpc$gdpshareconst
imfnoimpc$fpopulism <- ifelse(imfnoimpc$constit==1,
                              imfnoimpc$fpopulism*imfnoimpc$constitw,
                              imfnoimpc$fpopulism)
imfimp2 <- imfimp2[-c(5,7,16,19,20,22:27,29:41,46)]
imfimpc <- imfimpc[-c(5,7,16,19,20,22:27,29:41,46)]
imfnoimp2 <- imfnoimp2[-c(5,7,16,19,20,22:27,29:41,46)]
imfnoimpc <- imfnoimpc[-c(5,7,16,19,20,22:27,29:41,46)]

IMF <- read.csv("IMF_cond.csv") # load data from Kentikelenis et al again
IMF <- IMF[!duplicated(IMF$Arrangement.ID),] # eliminate duplicate programs
IMF <- IMF[c(2,8)]
IMF$proginit <- 1 # create binary program initiation variable
colnames(IMF) <- c("ccode", "year", "proginit")
imfimp3 <- left_join(imfimp2, unique(IMF), by = c("ccode", "year"))
imfimp3$proginit[is.na(imfimp3$proginit)] <- 0 # code program initiation

write.csv(imfimpc, "imputed_data_for_analysis.csv") # write to csv; toggled off for now
write.csv(imfnoimpc, "unimputed_data_for_analysis.csv")
write.csv(imfimp3, "imputed_data_for_prog_analysis.csv")

#### Load Analysis Datasets (START HERE FOR REPLICATION) ####
text <- read.csv("text_data_for_analysis.csv")
imfimpc <- read.csv("imputed_data_for_analysis.csv")
imfnoimpc <- read.csv("unimputed_data_for_analysis.csv")
imfimp3 <- read.csv("imputed_data_for_prog_analysis.csv")

colnames(text)[1] <- "rowID" # rename column in line with codebook 
colnames(imfimpc)[1] <- "rowID" # rename column in line with codebook 
colnames(imfnoimpc)[1] <- "rowID" # rename column in line with codebook 
colnames(imfimp3)[1] <- "rowID" # rename column in line with codebook 

#### Descriptive Text Plots ####
# prep data for basic plotting
Corpus.clean <- Corpus(VectorSource(text$text))
DTM <- DocumentTermMatrix(Corpus.clean)
dtm <- tidy(DTM)
dtmagg <- aggregate(dtm$count, by = list(dtm$term), FUN = sum)
colnames(dtmagg) <- c("word", "freq")
dtmagg <- dtmagg[order(-dtmagg$freq),]
head(dtmagg, 10)
dtmagg2 <- dtmagg[!(dtmagg$word %in% c("international", "monetary", "fund", "staff", "also", "will", "board",
                                       "statement", "welcome", "however", "well", "document", "note")),] # delete common words that lack substance

# bar plot of word frequencies in grays
dtmsh <- dtmagg2[1:20,]
dtmsh %>% 
  ggplot(aes(fct_rev(fct_reorder(word, freq)), freq)) +
  geom_col() +
  labs(x="Word", y="Frequency") +
  theme(axis.text.x = element_text(angle = 90, size = 14),
        axis.text.y = element_text(size = 14),
        axis.title = element_text(size = 16)) # Figure A1

# prep data for country-level work
text$count <- 1 # count variable to aggregate
ctryagg <- aggregate(text$text, list(text$ccode, text$ctry, as.numeric(text$year)), paste, collapse=" ") # aggregate by country-year
colnames(ctryagg) <- c("ccode", "ctry", "year", "text") # rename cols
ctryagg2 <- aggregate(text$count, list(text$ccode, as.numeric(text$year)), sum)
colnames(ctryagg2) <- c("ccode", "year", "count")
ctryagg <- merge(ctryagg, ctryagg2, by = c("ccode", "year"), all.x=T)

# sentiment plots
tokens <- data_frame(text = ctryagg$text) %>% 
  unnest_tokens(word, text) # get all words for plotting of positive and negative words

bing_word_counts <- tokens %>%
  inner_join(get_sentiments("loughran")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup()

bing_word_counts <- bing_word_counts[bing_word_counts$sentiment == "positive" | bing_word_counts$sentiment == "negative",]

bing_word_counts %>%
  group_by(sentiment) %>%
  slice_max(n, n = 10) %>% 
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(n, word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(x = "Contribution to sentiment",
       y = NULL) +
  theme(text = element_text(size=22)) # Figure A7

#### NOTE that the word clouds found in Figure 1 are constructed using the free online
#### program Wordle. We simply copy and paste all text from the Grays in the 
#### text$text column included here into Wordle to generate the plots while
#### subsetting by populist government or not.

#### LDA Plot ####
k <- 10 #number of topics decided by trial and error
seed = 1234 #necessary for reproducibility
#fit the model passing the parameters discussed above
#you could have more control parameters but will just use seed here

ui = unique(DTM$i) # delete empty rows
DTM = DTM[ui,]

lda <- LDA(DTM, k = k, method = "GIBBS", control = list(seed = seed))
num_words <- 10 #number of words to visualize

#create function that accepts the lda model and num word to display
theme_lyrics <- function(aticks = element_blank(),
                         pgminor = element_blank(),
                         lt = element_blank(),
                         lp = "none") {
  theme(plot.title = element_text(hjust = 0.5), #center the title
        axis.ticks = aticks, #set axis ticks to on or off
        panel.grid.minor = pgminor, #turn on or off the minor grid lines
        legend.title = lt, #turn on or off the legend title
        legend.position = lp) #turn on or off the legend
}
word_chart <- function(data, input, title) {
  data %>%
    #set y = 1 to just plot one variable and use word as the label
    ggplot(aes(as.factor(row), 1, label = input, fill = factor(topic) )) +
    #you want the words, not the points
    geom_point(color = "transparent") +
    #make sure the labels don't overlap
    geom_label_repel(nudge_x = .2,  
                     direction = "y",
                     box.padding = 0.1,
                     segment.color = "transparent",
                     size = 3) +
    facet_grid(~topic) +
    theme_lyrics() +
    theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
          #axis.title.x = element_text(size = 9),
          panel.grid = element_blank(), panel.background = element_blank(),
          panel.border = element_rect("lightgray", fill = NA),
          strip.text.x = element_text(size = 9)) +
    labs(x = NULL, y = NULL, title = title) +
    #xlab(NULL) + ylab(NULL) +
    #ggtitle(title) +
    coord_flip()
}
top_terms_per_topic <- function(lda_model, num_words) {
  
  #tidy LDA object to get word, topic, and probability (beta)
  topics_tidy <- tidy(lda_model, matrix = "beta")
  
  
  top_terms <- topics_tidy %>%
    group_by(topic) %>%
    arrange(topic, desc(beta)) %>%
    #get the top num_words PER topic
    slice(seq_len(num_words)) %>%
    arrange(topic, beta) %>%
    #row is required for the word_chart() function
    mutate(row = row_number()) %>%
    ungroup() %>%
    #add the word Topic to the topic labels
    mutate(topic = paste("Topic", topic, sep = " "))
  #create a title to pass to word_chart
  title <- paste("LDA Top Terms for", k, "Topics")
  #call the word_chart function you built in prep work
  word_chart(top_terms, top_terms$term, title)
}
#call the function you just built!
top_terms_per_topic(lda, num_words) # Figure A2

topics_tidy <- tidy(lda, matrix = "gamma")
number_of_documents = 5 #number of top docs to view
top_documents <- topics_tidy %>%
  group_by(topic) %>%
  arrange(topic, desc(gamma)) %>%
  slice(seq_len(number_of_documents)) %>%
  arrange(topic, gamma) %>%
  mutate(row = row_number()) %>%
  ungroup() %>%
  #re-label topics
  mutate(topic = paste("Topic", topic, sep = " "))

title <- paste("LDA Top Documents for", k, "Topics")
word_chart(top_documents, top_documents$document, title)

prtopics <- tidy(lda, matrix = "gamma") %>% filter(document == 1987)
for (i in 1988:2017) {
  temp <- tidy(lda, matrix = "gamma") %>% filter(document == i) # see prob for each topic-document over time
  prtopics <- rbind(prtopics, temp)
}
prtopics <- prtopics[prtopics$topic != 10,]
colnames(prtopics) <- c("Year", "Topic", "Gamma")
prtopics$Topic <- as.factor(prtopics$Topic)
prtopics$Year <- as.numeric(prtopics$Year)
toplab <- c("Global Financial Markets", "Reports and Staff Papers", "Staff Policy and Performance", "Access to Financing",
            "Taxes and Fiscal Policy", "Program Implementation", "Surveillance", "Exchange Rates and Monetary Policy",
            "Labor and Employment")
names(toplab) <- c(1:9)
ggplot(prtopics, aes(x = Year, y = Gamma)) +
  theme_bw() + 
  theme(axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 
  geom_line() +
  facet_wrap(~ Topic, nrow = 3, labeller = labeller(Topic = toplab)) # Figure A3

# Topic 1: global financial markets
text$text[28325] # representative response 

# Topic 2: reports and staff papers
text$text[2231]
text$text[30639]

# Topic 3: staff policymaking and performance
text$text[48877]
text$text[51040]

# Topic 4: access to resources / financing
text$text[40210]
text$text[28469]

# Topic 5: taxes and fiscal policy
text$text[20653]

# Topic 6: program implementation and performance
text$text[26708]

# Topic 7: surveillance and data collection
text$text[17347]
text$text[50903]

# Topic 8: exchange rates and monetary policy
text$text[25755]
text$text[25754]

# Topic 9: labor and unemployment
text$text[7921]
text$text[13274]

#### Descriptive Stats #### 
#plot how often every country speaks
ctry17 <- imfimpc[imfimpc$year == 2017,]

maps <- joinCountryData2Map(ctry17, joinCode = "ISO3", nameJoinColumn = "ccode", nameCountryColumn = "ctry")
maps <- subset(maps, continent != "Antarctica")
mapss <- mapCountryData(mapToPlot = maps, nameColumnToPlot = "countsp", mapRegion = "world",
                        catMethod = c(0, 20, 40, 60, 80), colourPalette = brewer.pal(9, name = "Blues"),
                        addLegend = FALSE, mapTitle = "")
do.call( addMapLegend, c(mapss, legendWidth=0.5, legendMar = 5)) # Figure A5

# plot average number of grays over time
timeagg <- aggregate(imfimpc$countsp, list(imfimpc$year), FUN = mean)
colnames(timeagg) <- c("year", "countsp")
ggplot(timeagg, aes(x=year, y=countsp)) +
  theme_bw() + 
  geom_bar(stat="identity") +
  labs(x = "Year", y = "Private Participation") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16),
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18)) # Figure A6

# treatment distribution across countries
pan <- imfimpc
pan$ctry <- countrycode(pan$ccode, origin = "iso3c", destination = "country.name")
pan <- aggregate(list(pan$countsp, pan$fpopulism), list(pan$ctry, pan$year), FUN = max)
colnames(pan) <- c("ctry", "year", "countsp", "populism")
pan <- pan[pan$year > 1989 & pan$year < 20189,]
pan$populism[pan$populism == 0] <- NA
panelView(populism~1, data = pan, index = c("ctry","year"), xlab = "Year", ylab = "Country",
          ignore.treat = T, main="", legendOff = T,
          cex.lab=16, cex.axis=12, axis.adjust = 1) # Figure A4

# Descriptive statistics (imputation)
covs_desc <- c("Grays participation", "Populism", "Constituent", "GDPPC", "Polity2", 
               "UN voting (ideal pt dist)", "IMF program", "Debt service / GNI", "Right wing government",
               "Number IOs", "Vote-power asymmetry", "U.S. aid", "UNSC member", "GDP growth", "Unemployment",
               "Sentiment") # covariate labels
desc <- imfimpc # copy main data
desc$rwg <- ifelse(I(desc$execrlc==1), 1, 0)
desc <- desc[,c(16,23,24,22,11,9,19,7,27,5,4,12,14,6,8,17)] # subset to exclude non-numeric and extra variables
desc <- data.frame(desc)
stargazer(desc, omit.summary.stat = c("p25", "p75"), digits = 2, style = "ajps",
          covariate.labels = covs_desc) # Table A1

# Descriptive statistics (no imputation)
desc <- imfnoimpc # copy main data
desc$rwg <- ifelse(I(desc$execrlc==1), 1, 0)
desc <- desc[,c(16,23,24,22,11,9,19,7,27,5,4,12,14,6,8,17)] # subset to exclude non-numeric and extra variables
desc <- data.frame(desc)
stargazer(desc, omit.summary.stat = c("p25", "p75"), digits = 2, style = "ajps",
          covariate.labels = covs_desc) # Table A2

# Descriptive stats on populists participating more, reported in text
t.test(imfimp2$countsp[imfimp2$fpopulism == 1], imfimp2$countsp[imfimp2$fpopulism == 0]) # 24 to 10, p = 0
t.test(imfimp2$sentiment[imfimp2$fpopulism == 1], imfimp2$sentiment[imfimp2$fpopulism == 0]) # [-0.007, 0.036] p = 0.2
t.test(imfimp2$IMFpart[imfimp2$fpopulism == 1], imfimp2$IMFpart[imfimp2$fpopulism == 0]) # [-0.028, 0.07] p = 0.407

# Poland anecdote, fn. 24
poland <- imfimp2[imfimp2$ccode=="POL",]
mean(poland$countsp[poland$fpopulism == 1], na.rm=T) # 41.5 2005-7, 2015-17
mean(poland$countsp[poland$fpopulism == 0], na.rm=T) # 16.1 hard test for our theory if economic explanations matter because crisis is in middle of populist spells

#### Regression Analysis (Grays DV) ####

# lines and labels needed for tables
covs <- c("Populism", "GDPPC", "Polity2", "UN voting (ideal pt dist)", 
          "Right wing government", "Debt service / GNI", "IMF program", "Vote-power asymmetry",
          "U.S. aid", "UNSC member", "GDP growth", "Unemployment") # covariate labels
ctryfe <- list(c("Country fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes")) # create rows denoting model specifications for regression tables
ctryre <- list(c("Country random effects", "Yes", "Yes", "Yes", "Yes", "Yes"))
yearre <- list(c("Year random effects", "Yes", "Yes", "Yes", "Yes", "Yes"))
yearfe <- list(c("Year fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes"))

# private participation ~ populism
regbiv <- lm(log1p(countsp) ~ fpopulism + factor(ccode) + factor(year), data = imfimpc)
(seregbiv <- coeftest(regbiv, vcov=vcovHC(regbiv, type = "HC0", cluster=imfimpc$ccode)))

reg1 <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
             I(execrlc==1) + dservgni + factor(ccode) + factor(year), data = imfimpc)
(sereg1 <- coeftest(reg1, vcov=vcovHC(reg1, type = "HC0", cluster=imfimpc$ccode)))

reg1a <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + IMFpart + asymm + factor(ccode) + factor(year), data = imfimpc)
(sereg1a <- coeftest(reg1a, vcov=vcovHC(reg1a, type = "HC0", cluster=imfimpc$ccode)))

reg1b <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + USaid + unsc + factor(ccode) + factor(year), data = imfimpc) 
(sereg1b <- coeftest(reg1b, vcov=vcovHC(reg1b, type = "HC0", cluster=imfimpc$ccode)))

reg1c <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(ccode) + factor(year), data = imfimpc) 
(sereg1c <- coeftest(reg1c, vcov=vcovHC(reg1c, type = "HC0", cluster=imfimpc$ccode)))

stargazer(regbiv, reg1, reg1a, reg1b, reg1c, se = list(seregbiv[,2], sereg1[,2], 
                                                       sereg1a[,2], 
                                                       sereg1b[,2], sereg1c[,2]), 
          dep.var.labels = "Covert participation",
          style = "ajps", omit = c("year", "ccode", "Constant"), 
          add.lines = c(yearfe, ctryfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table 2

# Sentiment ~ populism
regbivh <- lm(sentiment ~ fpopulism + factor(ccode) + factor(year), data = imfimpc)
(seregbivh <- coeftest(regbivh, vcov=vcovHC(regbivh, type = "HC1", cluster=imfimpc$ccode)))

reg1h <- lm(sentiment ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + factor(ccode) + factor(year), data = imfimpc)
(sereg1h <- coeftest(reg1h, vcov=vcovHC(reg1h, type = "HC1", cluster=imfimpc$ccode)))

reg1ah <- lm(sentiment ~ fpopulism + gdppc +  polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + IMFpart + asymm + factor(ccode) + factor(year), data = imfimpc)
(sereg1ah <- coeftest(reg1ah, vcov=vcovHC(reg1ah, type = "HC1", cluster=imfimpc$ccode)))

reg1bh <- lm(sentiment ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + USaid + unsc + factor(ccode) + factor(year), data = imfimpc) 
(sereg1bh <- coeftest(reg1bh, vcov=vcovHC(reg1bh, type = "HC1", cluster=imfimpc$ccode)))

reg1ch <- lm(sentiment ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(ccode) + factor(year), data = imfimpc) 
(sereg1ch <- coeftest(reg1ch, vcov=vcovHC(reg1ch, type = "HC1", cluster=imfimpc$ccode)))

stargazer(regbivh, reg1h, reg1ah, reg1bh, reg1ch, se = list(seregbivh[,2], sereg1h[,2], sereg1ah[,2],
                                                            sereg1bh[,2], sereg1ch[,2]), 
          dep.var.labels = "Sentiment",
          style = "ajps", omit = c("ccode", "year", "Constant"), add.lines = c(ctryfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A7

#### Robustness Checks (Grays DV) ####

# Region fixed effects
imfimpc$region <- countrycode(imfimpc$ccode, origin = "iso3c", destination = "region")
regfe <- list(c("Region fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes"))

regbivr <- lm(log1p(countsp) ~ fpopulism + factor(region) + factor(year), data = imfimpc)
(seregbivr <- coeftest(regbivr, vcov=vcovHC(regbivr, type = "HC0", cluster=imfimpc$ccode)))

reg1r <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + factor(region) + factor(year), data = imfimpc)
(sereg1r <- coeftest(reg1r, vcov=vcovHC(reg1r, type = "HC0", cluster=imfimpc$ccode)))

reg1ar <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + IMFpart + asymm + factor(region) + factor(year), data = imfimpc)
(sereg1ar <- coeftest(reg1ar, vcov=vcovHC(reg1ar, type = "HC0", cluster=imfimpc$ccode)))

reg1br <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + USaid + unsc + factor(region) + factor(year), data = imfimpc) 
(sereg1br <- coeftest(reg1br, vcov=vcovHC(reg1br, type = "HC0", cluster=imfimpc$ccode)))

reg1cr <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(region) + factor(year), data = imfimpc) 
(sereg1cr <- coeftest(reg1cr, vcov=vcovHC(reg1cr, type = "HC0", cluster=imfimpc$ccode)))

stargazer(regbivr, reg1ar, reg1br, reg1cr, se = list(seregbivr[,2], sereg1ar[,2],
                                                     sereg1br[,2], sereg1cr[,2]), 
          dep.var.labels = "Grays participation",
          style = "ajps", omit = c("region", "year", "Constant"), add.lines = c(regfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A3

# Random effects for ctry and year
regbivr <- lmer(log1p(countsp) ~ fpopulism + (1|ccode) + (1|year), data = imfimpc)

reg1r <- lmer(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
                I(execrlc==1) + dservgni + (1|ccode) + (1|year), data = imfimpc)

reg1ar <- lmer(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + dservgni + IMFpart + asymm + (1|ccode) + (1|year), data = imfimpc)

reg1br <- lmer(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + dservgni + USaid + unsc + (1|ccode) + (1|year), data = imfimpc) 

reg1cr <- lmer(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + dservgni + gdpgrow + unemployment + (1|ccode) + (1|year), data = imfimpc) 

stargazer(regbivr, reg1ar, reg1br, reg1cr, 
          dep.var.labels = "Grays participation",
          style = "ajps", omit = c("region", "year", "Constant"), add.lines = c(ctryre, yearre),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F", "bic"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A4

#### Sensitivity analysis (Grays DV) ####
# Countries
scicp_list <- unique(imfimpc$ccode)

scicp_drop_tib <- tibble(SCICP = scicp_list,
                         network_coef = NA,
                         network_se = NA,
                         network_p = NA,
                         network.decline_coef = NA,
                         network.decline_se = NA,
                         network.decline_p = NA)

for (i in 1:186){
  lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
       I(execrlc==1) + dservgni + factor(ccode) + factor(year), 
     data = imfimpc[imfimpc$ccode != scicp_list[i],]) -> run1
  tidy(run1) -> run1tidy
  
  scicp_drop_tib$network_coef[i] <- run1tidy$estimate[2]
  scicp_drop_tib$network_se[i] <- run1tidy$std.error[2]
  scicp_drop_tib$network_p[i] <- run1tidy$p.value[2]
  
  scicp_drop_tib$network.decline_coef[i] <- run1tidy$estimate[11]
  scicp_drop_tib$network.decline_se[i] <- run1tidy$std.error[11]
  scicp_drop_tib$network.decline_p[i] <- run1tidy$p.value[11]
  
  print(i)
}
scicp_drop_tib <- scicp_drop_tib[-187,]
scicp_drop_tib$id <- 1:186
ggplot(scicp_drop_tib, aes(x = id, y = network_coef)) +
  geom_point() +
  geom_linerange(aes(ymin = network_coef-1.645*network_se,
                     ymax = network_coef+1.645*network_se)) +
  theme_classic() +
  theme(legend.position = "none") +
  ylab("Populism coefficient") +
  xlab("Country ID") # Figure A8

#### Regression Analysis (Program DV) ####
covs <- c("Populism", "GDPPC", "Polity2", "UN voting (ideal pt dist)", 
          "Right wing government", "Debt service / GNI", "Vote-power asymmetry",
          "U.S. aid", "UNSC member", "GDP growth", "Unemployment") # covariate labels

# Test for program initiation

regbivp <- lm(proginit ~ fpopulism + factor(ccode) + factor(year), data = imfimp3)
(seregbivp <- coeftest(regbivp, vcov=vcovHC(regbivp, type = "HC0", cluster=imfimp3$ccode)))

reg1tp <- lm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + factor(ccode) + factor(year), data = imfimp3)
(sereg1tp <- coeftest(reg1tp, vcov=vcovHC(reg1tp, type = "HC0", cluster=imfimp3$ccode)))

reg1ap <- lm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + asymm + factor(ccode) + factor(year), data = imfimp3)
(sereg1ap <- coeftest(reg1ap, vcov=vcovHC(reg1ap, type = "HC0", cluster=imfimp3$ccode)))

reg1bp <- lm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + USaid + unsc + factor(ccode) + factor(year), data = imfimp3) 
(sereg1bp <- coeftest(reg1bp, vcov=vcovHC(reg1bp, type = "HC0", cluster=imfimp3$ccode)))

reg1cp <- lm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(ccode) + factor(year), data = imfimp3) 
(sereg1cp <- coeftest(reg1cp, vcov=vcovHC(reg1cp, type = "HC0", cluster=imfimp3$ccode)))

stargazer(regbivp, reg1tp, reg1ap, reg1bp, reg1cp, se = list(seregbivp[,2], sereg1tp[,2], 
                                                             sereg1ap[,2], 
                                                             sereg1bp[,2], sereg1cp[,2]), 
          dep.var.labels = "IMF program initiation",
          style = "ajps", omit = c("ccode", "year", "Constant"), add.lines = c(ctryfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table 3

#### Robustness Checks (Program DV) #####
# Probit
covs <- c("Populism", "GDPPC", "Polity2", "UN voting (ideal pt dist)", 
          "Right wing government", "Debt service / GNI", "Vote-power asymmetry",
          "U.S. aid", "UNSC member", "GDP growth", "Unemployment") # covariate labels
regbivp <- glm(proginit ~ fpopulism + factor(ccode) + factor(year), data = imfimp3, family = binomial(link = "probit"))
(seregbivp <- coeftest(regbivp, vcov=vcovHC(regbivp, type = "HC0", cluster=imfimp3$ccode)))

reg1tp <- glm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                I(execrlc==1) + dservgni + factor(ccode) + factor(year), data = imfimp3, family = binomial(link = "probit"))
(sereg1tp <- coeftest(reg1tp, vcov=vcovHC(reg1tp, type = "HC0", cluster=imfimp3$ccode)))

reg1ap <- glm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                I(execrlc==1) + dservgni + asymm + factor(ccode) + factor(year), data = imfimp3, family = binomial(link = "probit"))
(sereg1ap <- coeftest(reg1ap, vcov=vcovHC(reg1ap, type = "HC0", cluster=imfimp3$ccode)))

reg1bp <- glm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                I(execrlc==1) + dservgni + USaid + unsc + factor(ccode) + factor(year), data = imfimp3, family = binomial(link = "probit"))
(sereg1bp <- coeftest(reg1bp, vcov=vcovHC(reg1bp, type = "HC0", cluster=imfimp3$ccode)))

reg1cp <- glm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(ccode) + factor(year), data = imfimp3, family = binomial(link = "probit"))
(sereg1cp <- coeftest(reg1cp, vcov=vcovHC(reg1cp, type = "HC0", cluster=imfimp3$ccode)))

stargazer(regbivp, reg1tp, reg1ap, reg1bp, reg1cp, se = list(seregbivp[,2], sereg1tp[,2], 
                                                             sereg1ap[,2], 
                                                             sereg1bp[,2], sereg1cp[,2]), 
          dep.var.labels = "IMF program initiation",
          style = "ajps", omit = c("ccode", "year", "Constant"), add.lines = c(ctryfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A10

# Random effects
covs <- c("Populism", "GDPPC", "Polity2", "UN voting (ideal pt dist)", 
          "Right wing government", "Debt service / GNI", "Vote-power asymmetry",
          "U.S. aid", "UNSC member", "GDP growth", "Unemployment") # covariate labels
regbivp <- lmer(proginit ~ fpopulism + (1|ccode) + (1|year), data = imfimp3)

reg1tp <- lmer(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + dservgni + (1|ccode) + (1|year), data = imfimp3)

reg1ap <- lmer(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + asymm + (1|ccode) + (1|year), data = imfimp3)

reg1bp <- lmer(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + dservgni + USaid + unsc + (1|ccode) + (1|year), data = imfimp3)

reg1cp <- lmer(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
                 I(execrlc==1) + dservgni + gdpgrow + unemployment + (1|ccode) + (1|year), data = imfimp3)

stargazer(regbivp, reg1tp, reg1ap, reg1bp, reg1cp, 
          dep.var.labels = "IMF program participation",
          style = "ajps", omit = c("ccode", "year", "Constant"), add.lines = c(ctryfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "ser", "F", "bic"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A11

#### Sensitivity Analysis (Program DV) ####
scicp_list <- unique(imfimp3$ccode)

scicp_drop_tib <- tibble(SCICP = scicp_list,
                         network_coef = NA,
                         network_se = NA,
                         network_p = NA,
                         network.decline_coef = NA,
                         network.decline_se = NA,
                         network.decline_p = NA)

for (i in 1:186){
  lm(proginit ~ fpopulism + gdppc + polity2 + absidealdiff + 
       I(execrlc==1) + dservgni + factor(ccode) + factor(year), 
     data = imfimp3[imfimp3$ccode != scicp_list[i],]) -> run1
  tidy(run1) -> run1tidy
  
  scicp_drop_tib$network_coef[i] <- run1tidy$estimate[2]
  scicp_drop_tib$network_se[i] <- run1tidy$std.error[2]
  scicp_drop_tib$network_p[i] <- run1tidy$p.value[2]
  
  scicp_drop_tib$network.decline_coef[i] <- run1tidy$estimate[11]
  scicp_drop_tib$network.decline_se[i] <- run1tidy$std.error[11]
  scicp_drop_tib$network.decline_p[i] <- run1tidy$p.value[11]
  
  print(i)
}
scicp_drop_tib <- scicp_drop_tib[-187,]
scicp_drop_tib$id <- 1:186
ggplot(scicp_drop_tib, aes(x = id, y = network_coef)) +
  geom_point() +
  geom_linerange(aes(ymin = network_coef-1.645*network_se,
                     ymax = network_coef+1.645*network_se)) +
  theme_classic() +
  theme(legend.position = "none") +
  ylab("Populism coefficient") +
  xlab("Country ID") # Figure A11

#### Alternate Grays DV (Average # Words) ####
covs <- c("Populism", "GDPPC", "Polity2", "UN voting (ideal pt dist)", 
          "Right wing government", "Debt service / GNI", "IMF program", "Vote-power asymmetry",
          "U.S. aid", "UNSC member", "GDP growth", "Unemployment") # covariate labels
imfimpc$countwords <- ifelse(!is.na(imfimpc$text), 
                             str_count(imfimpc$text), 0)
imfimpc$avgwords <- imfimpc$countwords / imfimpc$countsp
imfimpc$avgwords[is.na(imfimpc$avgwords)] <- 0

regbivn <- lm(log1p(avgwords) ~ fpopulism + factor(ccode) + factor(year), data = imfimpc)
(seregbivn <- coeftest(regbivn, vcov=vcovHC(regbivn, type = "HC0", cluster=imfimpc$ccode)))

reg1n <- lm(log1p(avgwords) ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + factor(ccode) + factor(year), data = imfimpc)
(sereg1n <- coeftest(reg1n, vcov=vcovHC(reg1n, type = "HC0", cluster=imfimpc$ccode)))

reg1an <- lm(log1p(avgwords) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + IMFpart + asymm + factor(ccode) +  factor(year), data = imfimpc)
(sereg1an <- coeftest(reg1an, vcov=vcovHC(reg1an, type = "HC0", cluster=imfimpc$ccode)))

reg1bn <- lm(log1p(avgwords) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + USaid + unsc + factor(ccode) + factor(year), data = imfimpc) 
(sereg1bn <- coeftest(reg1bn, vcov=vcovHC(reg1bn, type = "HC0", cluster=imfimpc$ccode)))

reg1cn <- lm(log1p(avgwords) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(ccode) + factor(year), data = imfimpc) 
(sereg1cn <- coeftest(reg1cn, vcov=vcovHC(reg1cn, type = "HC0", cluster=imfimpc$ccode)))

stargazer(regbivn, reg1n, reg1an, reg1bn, reg1cn, se = list(seregbivn[,2], sereg1n[,2], 
                                                            sereg1an[,2],
                                                            sereg1bn[,2], sereg1cn[,2]), 
          dep.var.labels = "Covert participation",
          style = "ajps", omit = c("ccode", "year", "Constant"), add.lines = c(ctryfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F")) # Table XX

#### PanelMatch ####
imfimpanel <- imfimpc %>% distinct(ccode, year, .keep_all = TRUE) # create panel data for panelmatch
imfimpanel$ccode <- countrycode(imfimpanel$ccode, origin = "iso3c", destination = "cown")
imfimpanel <- imfimpanel[!is.na(imfimpanel$ccode),]
imfimpanel <- imfimpanel[-c(1, 26)]
imfimpanel$fpopulism <- I(imfimpanel$fpopulism > 0)
imfimpanel$fpopulism <- as.numeric(imfimpanel$fpopulism)

PM.results <- PanelMatch(lag = 3,
                         time.id = "year", 
                         unit.id = "ccode", 
                         treatment = "fpopulism", 
                         refinement.method = "mahalanobis", 
                         data = imfimpanel, 
                         match.missing = TRUE, 
                         size.match = 3, 
                         qoi = "att",
                         outcome.var = "countsp", 
                         lead = 0:4,
                         covs.formula = ~ gdppc + absidealdiff + polity2 + dservgni +
                           I(execrlc==1),
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE)

PE.results <- PanelEstimate(sets = PM.results, 
                            data = imfimpanel, 
                            se.method = "conditional", 
                            number.iterations = 5000, 
                            confidence.level = .95)

plot(PE.results,
     ylab = "Estimated Effect of Populism",
     main="") # Figure A9

covbal <- data.frame(get_covariate_balance(PM.results$att, 
                                           data=imfimpanel, 
                                           covariates=c("absidealdiff",
                                                        "polity2",
                                                        "dservgni",
                                                        "asymm",
                                                        "USaid",
                                                        "unsc",
                                                        "gdpgrow",
                                                        "unemployment"), 
                                           ylim=c(-2,2)))

covbal$time <- c(-3:0)
colnames(covbal) <- c("UN voting (ideal pt dist)",
                      "Polity2",
                      "Debt service / GNI",
                      "Vote-power asymmetry",
                      "U.S. aid",
                      "UNSC member",
                      "GDP growth",
                      "Unemployment",
                      "time")
covbal <- covbal %>% 
  gather(variable, value, 'UN voting (ideal pt dist)':'Unemployment', factor_key=TRUE) 

ggplot(covbal, aes(x = time, y = value, color = variable)) +
  geom_line() +
  labs(x = "Time to Treatment", 
       y = "Balance (Standardized Difference)",
       color = "Variable") +
  ylim(-2,2) +
  theme(panel.grid.minor = element_blank(),
        axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16),
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18)) # Figure A10

#### Conditionality Test ####
covs_cond <- c("Number of Grays", "Populism", "GDPPC", "Polity2",
               "UN voting (ideal pt dist)", "Right-wing government",
               "Debt service / GNI", "GDP growth", "Unemployment", "U.S. aid",
               "Vote-power asymmetry")
c1 <- glm.nb(count~scale(countsp) + fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + USaid + asymm +
               factor(ccode) + factor(year), imfimpc)
(sec1 <- coeftest(c1, vcov=vcovHC(c1, type = "HC0", cluster=imfimpc$ccode)))

stargazer(c1, se = list(sec1[,2]), 
          dep.var.labels = "Number of conditions",
          style = "ajps", omit = c("year", "ccode", "Constant"), 
          add.lines = c(yearfe, ctryfe),
          covariate.labels = covs_cond,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A6

#### Control for ED transitions ####
eds <- read.csv("imf_executive_directors.csv") # load hand-coded data on EDs and their tenures
eds$ccode <- countrycode(eds$Country, origin = "country.name", destination = "iso3c")
eds <- eds[c(1,3,4)]
colnames(eds) <- c("year", "name", "ccode")
constit <- read.csv("constituency.csv") #load hand-coded data on constituency memberships for all IMF members, coded from online resources
constit <- constit[-c(3)]
ced <- constit[c(1,2,3)]
eds <- left_join(unique(eds), unique(ced), c("ccode", "year"))
eds <- eds[c(1,2,4)]

eds <- left_join(unique(imfimpc), unique(eds), c("const", "year")) # merge with data
eds <- eds %>%
  group_by(ccode) %>%
  mutate(edch = ifelse(name != lag(name), 1, 0)) # code whether ED changed
eds$edch[is.na(eds$edch)] <- 0
edfe <- list(c("ED fixed effects", "Yes", "Yes", "Yes", "Yes", "Yes"))

# ED fixed effects
regbivr <- lm(log1p(countsp) ~ fpopulism + factor(name) + factor(year), data = eds)
(seregbivr <- coeftest(regbivr, vcov=vcovHC(regbivr, type = "HC0", cluster=eds$ccode)))

reg1r <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + factor(name) + factor(year), data = eds)
(sereg1r <- coeftest(reg1r, vcov=vcovHC(reg1r, type = "HC0", cluster=eds$ccode)))

reg1ar <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + IMFpart + asymm + factor(name) + factor(year), data = eds)
(sereg1ar <- coeftest(reg1ar, vcov=vcovHC(reg1ar, type = "HC0", cluster=eds$ccode)))

reg1br <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + USaid + unsc + factor(name) + factor(year), data = eds) 
(sereg1br <- coeftest(reg1br, vcov=vcovHC(reg1br, type = "HC0", cluster=eds$ccode)))

reg1cr <- lm(log1p(countsp) ~ fpopulism + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(name) + factor(year), data = eds) 
(sereg1cr <- coeftest(reg1cr, vcov=vcovHC(reg1cr, type = "HC0", cluster=eds$ccode)))

stargazer(regbivr, reg1ar, reg1br, reg1cr, se = list(seregbivr[,2], sereg1ar[,2],
                                                     sereg1br[,2], sereg1cr[,2]), 
          dep.var.labels = "Grays participation",
          style = "ajps", omit = c("name", "year", "Constant"), add.lines = c(edfe, yearfe),
          covariate.labels = covs,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A9

covs_ed <- c("Populism", "ED Change", "GDPPC", "Polity2", "UN voting (ideal pt dist)", 
             "Right wing government", "Debt service / GNI", "IMF program", "Vote-power asymmetry",
             "U.S. aid", "UNSC member", "GDP growth", "Unemployment") # covariate labels

# Control for ED change
regbivr <- lm(log1p(countsp) ~ fpopulism + edch + factor(ccode) + factor(year), data = eds)
(seregbivr <- coeftest(regbivr, vcov=vcovHC(regbivr, type = "HC0", cluster=eds$ccode)))

reg1r <- lm(log1p(countsp) ~ fpopulism + edch + gdppc + polity2 + absidealdiff + 
              I(execrlc==1) + dservgni + factor(ccode) + factor(year), data = eds)
(sereg1r <- coeftest(reg1r, vcov=vcovHC(reg1r, type = "HC0", cluster=eds$ccode)))

reg1ar <- lm(log1p(countsp) ~ fpopulism + edch + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + IMFpart + asymm + factor(ccode) + factor(year), data = eds)
(sereg1ar <- coeftest(reg1ar, vcov=vcovHC(reg1ar, type = "HC0", cluster=eds$ccode)))

reg1br <- lm(log1p(countsp) ~ fpopulism + edch + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + USaid + unsc + factor(ccode) + factor(year), data = eds) 
(sereg1br <- coeftest(reg1br, vcov=vcovHC(reg1br, type = "HC0", cluster=eds$ccode)))

reg1cr <- lm(log1p(countsp) ~ fpopulism + edch + gdppc + polity2 + absidealdiff + 
               I(execrlc==1) + dservgni + gdpgrow + unemployment + factor(ccode) + factor(year), data = eds) 
(sereg1cr <- coeftest(reg1cr, vcov=vcovHC(reg1cr, type = "HC0", cluster=eds$ccode)))

stargazer(regbivr, reg1ar, reg1br, reg1cr, se = list(seregbivr[,2], sereg1ar[,2],
                                                     sereg1br[,2], sereg1cr[,2]), 
          dep.var.labels = "Grays participation",
          style = "ajps", omit = c("ccode", "year", "Constant"), add.lines = c(ctryfe, yearfe),
          covariate.labels = covs_ed,
          omit.stat = c("ll", "aic", "theta", "rsq", "ser", "F"),
          star.cutoffs = c(0.05, 0.01, 0.001)) # Table A8

#### ED Tenure by Populism ####
eds$counted <- 1
eds <- eds[eds$fpopulism>0,]
edsagg <- aggregate(list(eds$countsp), list(eds$name), FUN = mean) # create data showing populist-selected EDs
colnames(edsagg) <- c("name", "countsp")
edsr <- read.csv("imf_executive_directors.csv")
edsr$count <- 1
edsr <- aggregate(list(edsr$count), list(edsr$Name), FUN = sum)
colnames(edsr) <- c("name", "tenure")
edsr <- left_join(unique(edsr), unique(edsagg), "name")
edsr$countsp[is.na(edsr$countsp)] <- 0

ggplot(edsr, aes(y = log(tenure), x = countsp)) +
  geom_smooth(method = "lm") +
  geom_point() +
  labs(x = "Average Number of Grays", y = "Tenure (log years)") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16),
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18)) # Figure A12

#### Italy, Venezuela Cases ####

# Restrict data on number of Grays for several interesting cases
cases <- imfimpc[imfimpc$ccode %in% c("VEN", "ITA"),]
cases$popbin <- ifelse(cases$fpopulism > 0, 1, 0)
casesp <- cases
cases$popbin <- as.factor(cases$popbin)
cases$popbin[cases$ccode=="ITA" & cases$year==2007] <- 1 # correct error for Italy
ggplot(cases[cases$ccode=="ITA" & cases$countsp > 0,], aes(x = year, y = countsp, color = popbin)) +
  geom_col() +
  labs(x = "Year", y = "Number of Grays", color = "Populism", title = "Italy") +
  theme(axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16),
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18),
        legend.title = element_text(size = 16), title = element_text(size = 18)) # Figure A13

casesi <- casesp %>%
  filter(ccode=="ITA" & popbin==1) %>%
  unnest_tokens(output = word, input = text) %>%
  anti_join(stop_words) %>%
  count(word, sort = TRUE) # get text of grays for italy

casesi %>% 
  filter(n > 1500) %>% 
  mutate(word = reorder(word, n)) %>% 
  ggplot(aes(word, n)) + 
  geom_col() +
  coord_flip() +
  labs(x = "Word \n", y = "\n Count ", title = "Italy") +
  geom_text(aes(label = n), hjust = 1.2, colour = "white", fontface = "bold") +
  theme(plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(face="bold", colour="darkblue", size = 12),
        axis.title.y = element_text(face="bold", colour="darkblue", size = 12)) # Figure A14


ggplot(cases[cases$ccode=="VEN" & cases$countsp > 0,], aes(x = year, y = countsp, color = popbin)) +
  geom_col() +
  labs(x = "Year", y = "Number of Grays", color = "Populism", title = "Venezuela") +
  theme(axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16),
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18),
        legend.title = element_text(size = 16), title = element_text(size = 18)) # Figure A15

casesi <- casesp %>%
  filter(ccode=="VEN" & popbin==1) %>%
  unnest_tokens(output = word, input = text) %>%
  anti_join(stop_words) %>%
  count(word, sort = TRUE) # get text for venezuela

casesi %>% 
  filter(n > 700) %>% 
  mutate(word = reorder(word, n)) %>% 
  ggplot(aes(word, n)) + 
  geom_col() +
  coord_flip() +
  labs(x = "Word \n", y = "\n Count ", title = "Venezuela") +
  geom_text(aes(label = n), hjust = 1.2, colour = "white", fontface = "bold") +
  theme(plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(face="bold", colour="darkblue", size = 12),
        axis.title.y = element_text(face="bold", colour="darkblue", size = 12)) # Figure A16

sink(file = NULL)
